#################################################################
# Normalization of Censorship: Evidence from China
# Replication Files (Observational Study, Online Appendices)
#
# Author: Tony Zirui Yang
# Date: February 5, 2024
#
# The lines below reproduce the Tables
# related to the observational study reported in the appendices
#################################################################

rm(list=ls())

library("caret")
library("irr")
library("data.table")

# Load the data
TableD1.data <- fread("TableD1.csv")
TableE1.data <- fread("TableE1.csv")
TableE2.data <- fread("TableE2.csv")
TableE3.data <- fread("TableE3.csv")


## ------------------------------
## Table D1
## ------------------------------

# Create a confusion matrix
confusion_matrix <- confusionMatrix(data = factor(TableD1.data$Coder2, levels = unique(TableD1.data$Coder2)),
                                    reference = factor(TableD1.data$Coder1, levels = unique(TableD1.data$Coder1)))

# Calculate precision, recall, and F1-score by category
Table.D1 <- round(confusion_matrix$byClass[, c("Precision", "Recall", "F1")], 2)

# Calculate macro precision, recall, and F1-score
Macro <- round(c(mean(Table.D1[,"Precision"]),
                 mean(Table.D1[,"Recall"]),
                 mean(Table.D1[,"F1"])),2)

# Sort the rows alphabetically by category name
Table.D1 <- as.data.frame(t(Table.D1[order(rownames(Table.D1)), ]))

# Modify the column names
colnames(Table.D1) <- substr(colnames(Table.D1),
                                        nchar(colnames(Table.D1)) - 2,
                                        nchar(colnames(Table.D1)))

# Attach Macro precision, recall, and F1-score
Table.D1$Macro <- Macro

Table.D1

## ------------------------------
## In Appendix Section D.2, I also claim that:
## ------------------------------

# The accuracy rate between the two coders is 82.5% when considering specific topic categories.

# The numbers in this paragraph come form the following analysis: 

coder.confusion <- table(TableD1.data$Coder1, TableD1.data$Coder2)
coder.accuracy <- paste(round(sum(diag(coder.confusion))/sum(coder.confusion),4)*100, "%", sep = "")
coder.accuracy

# When identifying whether an article is political or non-political, the two coders agree on 92.97% of the cases.

# The numbers in this paragraph come form the following analysis: 

TableD1.data$Coder1_Political <- ifelse(TableD1.data$Coder1 %in% c("COL", "CRI", "GOV"),1,0)
TableD1.data$Coder2_Political <- ifelse(TableD1.data$Coder2 %in% c("COL", "CRI", "GOV"),1,0)

coder.confusion.pol <- table(TableD1.data$Coder1_Political, TableD1.data$Coder2_Political)
coder.accuracy.pol <- paste(round(sum(diag(coder.confusion.pol))/sum(coder.confusion.pol),4)*100, "%", sep = "")
coder.accuracy.pol

# The Cohen’s κ between the two coders is 0.80,
# higher than the commonly applied criteria of 0.70 for inter-coder reliability tests.

# The numbers in this paragraph come form the following analysis: 

round(kappa2(TableD1.data[,1:2])$value,2)


## ------------------------------
## Table E1
## ------------------------------

# Create a function for calculating macro F1 score
f1_score <- function(actual, predicted) {
  confusion_matrix <- table(actual, predicted)
  precisions <- diag(confusion_matrix) / rowSums(confusion_matrix)
  recalls <- diag(confusion_matrix) / colSums(confusion_matrix)
  f1_scores <- 2 * (precisions * recalls) / (precisions + recalls)
  macro_f1 <- mean(f1_scores, na.rm = TRUE)
  return(macro_f1)
}

# Calculate the macro F1 score for each model prediction
Table.E1 <- data.frame(Macro_F1_Score = round(c(
  f1_score(TableE1.data$Original_Label, TableE1.data$BERT_Prediction),
  f1_score(TableE1.data$Original_Label, TableE1.data$LogitRegression_Prediction),
  f1_score(TableE1.data$Original_Label, TableE1.data$PaLM_Prediction),
  f1_score(TableE1.data$Original_Label, TableE1.data$XGBoost_Prediction),
  f1_score(TableE1.data$Original_Label, TableE1.data$RandomForest_Prediction),
  f1_score(TableE1.data$Original_Label, TableE1.data$VotingClassifier_Prediction),
  f1_score(TableE1.data$Original_Label, TableE1.data$DecisionTree_Prediction),
  f1_score(TableE1.data$Original_Label, TableE1.data$NeuralNetwork_Prediction),
  f1_score(TableE1.data$Original_Label, TableE1.data$Word2Vec_Prediction)),4))

rownames(Table.E1) <- c("Fine-tuned Pre-Train Chinese BERT",
                        "Logistic Regression (Ridge)",
                        "Pattern Learning and Matching (PaLM)",
                        "Extreme Gradient Boosting (XGBoost)",
                        "Random Forest",
                        "Ensemble Classifier (Voting)",
                        "Decision Tree",
                        "Neural Network",
                        "Word2Vec Embedding")

Table.E1


## ------------------------------
## Table E2
## ------------------------------

# Create a confusion matrix
confusion_matrix.E2 <- confusionMatrix(data = factor(TableE2.data$Outsample_Prediction, levels = unique(TableE2.data$Outsample_Prediction)),
                                    reference = factor(TableE2.data$Coder1, levels = unique(TableE2.data$Coder1)))

# Calculate precision, recall, and F1-score by category
Table.E2 <- round(confusion_matrix.E2$byClass[, c("Precision", "Recall", "F1")], 2)

# Calculate macro precision, recall, and F1-score
Macro.E2 <- round(c(mean(Table.E2[,"Precision"]),
                 mean(Table.E2[,"Recall"]),
                 mean(Table.E2[,"F1"])),2)

# Sort the rows alphabetically by category name
Table.E2 <- as.data.frame(t(Table.E2[order(rownames(Table.E2)), ]))

# Modify the column names
colnames(Table.E2) <- substr(colnames(Table.E2),
                             nchar(colnames(Table.E2)) - 2,
                             nchar(colnames(Table.E2)))

# Attach Macro precision, recall, and F1-score
Table.E2$Macro <- Macro.E2

Table.E2


## ------------------------------
## In Appendix Section E.2, I also claim that:
## ------------------------------

# The in-sample accuracy rate of the BERT model is 0.96.

# The numbers in this paragraph come form the following analysis: 

Insample.confusion <- table(TableE2.data$Coder1, TableE2.data$Insample_Prediction)
Insample.accuracy <- round(sum(diag(Insample.confusion))/sum(Insample.confusion),2)
Insample.accuracy

# However, one concern arises regarding potential imbalances within the nine categories.
# To address this issue, I calculate category-specific weights to adjust for variations in category sizes.
# This approach allows for a more accurate assessment of the balanced performance of the BERT model.
# The results indicate a balanced precision of 0.71, a balanced recall of 0.73,
# and a balanced macro F1 score of 0.72.

# The numbers in this paragraph come form the following analysis: 

weights <- 1/(sum(1/table(TableE2.data$Coder1))) * 1/table(TableE2.data$Coder1)

weighted_precision <- round(sum(as.numeric(Table.E2["Precision",1:9]) * weights),2)
weighted_precision

weighted_recall  <- round(sum(as.numeric(Table.E2["Recall",1:9]) * weights),2)
weighted_recall

weighted_macro_F1 <- (weighted_recall + weighted_precision)/2
weighted_macro_F1

# Furthermore, I combine the three highly political categories,
# as well as the remaining six non-political categories, transforming the classification task into a binary one.
# When determining whether an article falls into the highly political category or not,
# the BERT model excels with a balanced precision of 0.83,
# a balanced recall of 0.83, and a balanced macro F1 score of 0.83.

# The numbers in this paragraph come form the following analysis: 

TableE2.data$Coder_Political <- ifelse(TableE2.data$Coder1 %in% c("COL", "CRI", "GOV"),1,0)
TableE2.data$Outsample_Political <- ifelse(TableE2.data$Outsample_Prediction %in% c("COL", "CRI", "GOV"),1,0)

confusion_pol <- table(TableE2.data$Outsample_Political, TableE2.data$Coder_Political)

weights_pol <- 1/(sum(1/table(TableE2.data$Coder_Political))) * 1/table(TableE2.data$Coder_Political)

weighted_precision_pol <- round(sum(diag(confusion_pol) / rowSums(confusion_pol) * weights_pol),2)
weighted_precision_pol

weighted_recall_pol  <- round(sum(diag(confusion_pol) / colSums(confusion_pol) * weights_pol),2)
weighted_recall_pol

weighted_macro_F1_pol <- round((weighted_recall_pol + weighted_precision_pol)/2,2)
weighted_macro_F1_pol

## ------------------------------
## Table E3
## ------------------------------

# Calculate the proportion of each specific category
TableE3.proportion <- round(table(TableE3.data$Multinomial_Prediction)/nrow(TableE3.data),4)*100

Table.E3 <- data.frame(
  General_Category = c(rep("Highly Political",4),rep("Moderately Political",3),rep("Non-Political",5)),
  Specific_Category = c("Collective Action", "Govt Criticism", "Other Govt-related", "Total",
                        "Business", "Foreign", "Total",
                        "Entertainment", "Advertisement", "Culture", "Others", "Total"),
  Logistical_Regression_Ridge = c(TableE3.proportion["COL"],TableE3.proportion["CRI"],TableE3.proportion["GOV"],
                                  sum(TableE3.proportion[c("COL", "CRI", "GOV")]),
                                  TableE3.proportion["BET"],TableE3.proportion["FOR"],
                                  sum(TableE3.proportion[c("BET", "FOR")]),
                                  TableE3.proportion["ESX"],TableE3.proportion["ADS"],TableE3.proportion["LCT"],TableE3.proportion["OTH"],
                                  sum(TableE3.proportion[c("ESX", "ADS", "LCT", "OTH")]))
  )

Table.E3[,"Logistical_Regression_Ridge"] <- paste(Table.E3[,"Logistical_Regression_Ridge"], "%", sep = "")

Table.E3

